import time
import requests
import pandas as pd
from .soup_utils import get_url_list, get_page_max_num
from .ranking_spider import get_img_page_info
from .spider_def import delay, headers


def get_img_page_url_list(page_num: int) -> list[str]:
    time.sleep(delay)
    url = f'https://www.vilipix.com/new?p={page_num}'
    print(f'[*] get url list from {url}')

    res = requests.get(url, headers=headers)

    max_page_num = get_page_max_num(res.text)
    if page_num > max_page_num:
        return []
    return get_url_list(res.text, '.pix-card > .title > a')


def get_all_img_info(page_start: int = 1, page_stop: int = 114514) -> list[dict]:
    r"""批量获取最新图片信息。

    :param page_start: 开始页数，默认为1。
    :param page_stop: 结束页数，若不指定，会一直爬到最后一页（如果page_start不指定，会爬2000多页）
    :return: 包含所有最新作品的信息字典的列表
    """
    result_list = []
    now_page = page_start
    while now_page <= page_stop:
        url_list = get_img_page_url_list(now_page)
        if len(url_list) <= 0:
            break
        for url in url_list:
            result_list.append(get_img_page_info(url))
        now_page += 1
    return result_list


if __name__ == '__main__':
    info_list = get_all_img_info(1, 1)
    df1 = pd.DataFrame(info_list)
    print(df1)
    # df1.to_csv('new_works_info.csv')
